library(tidyverse)
library(rlang)
library(lubridate)
library(scales)
library(ggrepel)
library(glue)
library(rvest)
library(pander)
library(plotly)
library(QuantTools)
library(jsonlite)
panderOptions("big.mark", ",")
panderOptions("table.split.table", Inf)
panderOptions("table.style", "rmarkdown")
panderOptions("missing", "")
theme_set(theme_bw())
auStates <- c(
  ACT = "Australian Capital Territory",
  QLD = "Queensland",
  NSW = "New South Wales",
  VIC = "Victoria",
  SA = "South Australia",
  WA = "Western Australia",
  NT = "Northern Territory",
  TAS = "Tasmania",
  AUS = "All States"
)
ausPops <- tribble(
  ~State, ~Population,
  "New South Wales",    8117976,
  "Victoria", 6629870,
  "Queensland", 5115451,
  "South Australia", 1756494,
  "Western Australia", 2630557,
  "Tasmania", 535500,
  "Northern Territory", 245562,
  "Australian Capital Territory", 428060
) %>%
  bind_rows(
    tibble(
      State = "All States",
      Population = sum(.$Population)
    )
  )

Data Sources

data <- fromJSON("https://covidlive.com.au/covid-live.json") %>%
    as_tibble() %>%
    mutate(
      across(
        .cols = ends_with("CNT"), 
        .fns = as.numeric
      ),
      REPORT_DATE = ymd(REPORT_DATE)
    )
dt <- data %>%
    dplyr::filter(
        CODE == "AUS", !is.na(LAST_UPDATED_DATE)
    ) %>%
    pull(REPORT_DATE) %>%
    max()

International Data

International data and figures can be viewed here

Latest Australian Data

Australian State populations were taken from the ABS Website and were accurate in Sept 2019.

  • Using an estimated population size of 50,918,940, the total percentage of the Australian population confirmed as having been infected currently sits at 0.06%, or one person in every 1,771.
  • Within Victoria, that rises to one in every 324 having contracted the virus at some point
data %>%
    dplyr::filter(
        REPORT_DATE == dt
    ) %>%
    dplyr::mutate(
        Increase = CASE_CNT - PREV_CASE_CNT,
        `% Increase` = percent(Increase / PREV_CASE_CNT, accuracy = 0.1),
        `Fatality Rate` = percent(DEATH_CNT / CASE_CNT, accuracy = 0.1),
        `Recovery Rate` = percent(RECOV_CNT / CASE_CNT, accuracy = 0.1),
        State = case_when(
            CODE == "AUS" ~ "National Total",
            TRUE ~ auStates[CODE]
        ),
        State = factor(State, levels = c(dplyr::arrange(ausPops, desc(Population))$State, "National Total"))
    ) %>%
    dplyr::rename(
        Fatalities = DEATH_CNT,
        Recovered = RECOV_CNT,
        `Currently Active` = ACTIVE_CNT
    ) %>%
    dplyr::select(
        State,
        PREV_CASE_CNT, CASE_CNT,
        contains("Increase"),
        contains("Fatal"),
        contains("Recov", ignore.case = FALSE),
        `Currently Active`
    ) %>%
    dplyr::arrange(State) %>%
    setNames(
        str_replace_all(names(.), "PREV_CASE_CNT", as.character(dt - 1))
    ) %>%
    setNames(
        str_replace_all(names(.), "CASE_CNT", as.character(dt))
    ) %>%
    pander(
        justify = "lrrrrrrrrr",
        caption = paste(
            "*Confirmed cases, fatalities and recoveries reported by each state at time of preparation.*"
        ),
        emphasize.strong.rows = nrow(.)
    )
Confirmed cases, fatalities and recoveries reported by each state at time of preparation.
State 2021-01-21 2021-01-22 Increase % Increase Fatalities Fatality Rate Recovered Recovery Rate Currently Active
New South Wales 5,084 5,083 -1 0.0% 54 1.1% 3,236 63.7% 78
Victoria 20,433 20,434 1 0.0% 820 4.0% 19,583 95.8% 31
Queensland 1,300 1,303 3 0.2% 6 0.5% 1,261 96.8% 23
Western Australia 888 890 2 0.2% 9 1.0% 869 97.6% 12
South Australia 596 596 0 0.0% 4 0.7% 587 98.5% 5
Tasmania 234 234 0 0.0% 13 5.6% 221 94.4% 0
Australian Capital Territory 118 118 0 0.0% 3 2.5% 115 97.5% 0
Northern Territory 97 97 0 0.0% 0 0.0% 89 91.8% 8
National Total 28,750 28,755 5 0.0% 909 3.2% 25,961 90.3% 157

Plot of Current Australian Values

ausStatsCap <- "*Current confirmed and recovered cases, along with fatalities for Australia only. Active cases are shown as confirmed cases excluding fatalities and those classed as recovered. Some data regarding recovered cases prior to 1^st^ May 2020 may be estimates.*"
ggplotly(
  data %>% 
    dplyr::filter(CODE == "AUS", REPORT_DATE <= dt, REPORT_DATE > "2020-03-01") %>% 
    dplyr::select(
      Date = REPORT_DATE, 
      Confirmed = CASE_CNT,
      Active = ACTIVE_CNT, 
      Fatal = DEATH_CNT, 
      Recovered = RECOV_CNT
    ) %>% 
    mutate(
      Active = case_when(
        is.na(Active) ~ Confirmed - Fatal - Recovered,
        TRUE ~ Active
      )
    )  %>%
    pivot_longer(
      cols = c("Active", "Fatal", "Recovered"), 
      names_to = "Status", values_to = "Total"
    ) %>%
    mutate(
      Status = factor(Status, levels = c("Fatal", "Recovered", "Active"))
    ) %>%
    ggplot(aes(Date, Total, fill = Status)) +
    geom_col() +
    geom_line(
      data = . %>%
        group_by(Date) %>%
        summarise(
          Total = sum(Total)
        ) %>%
        mutate(Status = "Confirmed"),
      colour = "blue"
    ) +
    scale_fill_manual(
      values = c(
        Active = rgb(0, 0, 0),
        Confirmed = rgb(0, 0.3, 0.7),
        Fatal = rgb(0.8, 0.2, 0.2),
        Recovered = rgb(0.2, 0.7, 0.4)
      )
    ) +
    scale_x_date(expand = expansion(c(0, 0.03))) +
    scale_y_continuous(expand = expansion(c(0, 0.05))) +
    labs("Total Cases")
)

Current confirmed and recovered cases, along with fatalities for Australia only. Active cases are shown as confirmed cases excluding fatalities and those classed as recovered. Some data regarding recovered cases prior to 1st May 2020 may be estimates.

ggplotly(
  data %>% 
    dplyr::filter(CODE != "AUS", REPORT_DATE <= dt, REPORT_DATE > "2020-03-01") %>% 
    dplyr::select(
      CODE,
      Date = REPORT_DATE, 
      Confirmed = CASE_CNT,
      Active = ACTIVE_CNT, 
      Fatal = DEATH_CNT, 
      Recovered = RECOV_CNT
    ) %>% 
    mutate(
      Active = case_when(
        is.na(Active) ~ Confirmed - Fatal - Recovered,
        TRUE ~ Active
      ),
      State = auStates[CODE]
    )  %>%
    pivot_longer(
      cols = c("Active", "Fatal", "Recovered"), 
      names_to = "Status", values_to = "Total"
    ) %>%
    left_join(ausPops) %>%
    mutate(
      Status = factor(Status, levels = c("Fatal", "Recovered", "Active")),
      Rate = 1e6*Total / Population
    ) %>%
    ggplot(aes(Date, Rate, fill = Status, label = Total)) +
    geom_col() +
    geom_line(
      data = . %>%
        group_by(State, Date) %>%
        summarise(
          Rate = sum(Rate),
          Total = sum(Total)
        ) %>%
        mutate(Status = "Confirmed"),
      colour = "blue"
    ) +
    facet_wrap(~State, ncol = 4) + 
    scale_fill_manual(
      values = c(
        Active = rgb(0, 0, 0),
        Confirmed = rgb(0, 0.3, 0.7),
        Fatal = rgb(0.8, 0.2, 0.2),
        Recovered = rgb(0.2, 0.7, 0.4)
      )
    ) +
    scale_x_date(expand = expansion(c(0, 0.03))) +
    labs(y = "Rate (Cases / Million)")
)

Breakdown of individual states. Victorian recovered numbers began to be accurately reported from 22nd March, with other states gradually providing this information. NSW/QLD recovered cases have only recently begun being reported and up until the most recent dates, recovered/active values were very approximate for these states. The extreme drop for NSW active cases in early June is a function of the changed reporting strategy implemented by NSW Health.

Daily New Cases

ggplotly(
  data %>% 
    dplyr::select(
      State = CODE, date = REPORT_DATE, 
      confirmed = CASE_CNT, daily = NEW_CASE_CNT
    ) %>%
    mutate(
      State = auStates[State]
    ) %>%
    arrange(State, date) %>%
    group_by(State) %>%
    mutate(
      daily = c(0, diff(confirmed)),
      daily = ifelse(daily < 0, 0, daily),
      MA = round(sma(daily, 7), 2),
      MA2 = round(sma(daily, 14), 2),
      `Above Average` = MA > MA2
    ) %>%
    dplyr::filter(date > "2020-03-01") %>%
    ggplot(aes(date, daily)) +
    geom_col(
      aes(fill = `Above Average`, colour = `Above Average`),
      data = . %>% dplyr::filter(!is.na(`Above Average`)),
      width = 1/2
    ) +
    geom_line(aes(y = MA), colour = "blue") +
    geom_line(aes(y = MA2), colour = "black") +
    facet_wrap(~State, scales = "free_y") +
    labs(
      x = "Date",
      y = "Daily New Cases",
      fill = "\nAbove\nAverage"
    ) +
    scale_fill_manual(values = c("white", rgb(1, 0.2, 0.2))) +
    scale_colour_manual(values = c("grey50", rgb(1, 0.2, 0.2))),
  tooltip = c(
    "date", "daily", "MA"
  )
)

Daily new cases for each state shown against the 7-day (blue) and 14-day (black) averages. Days which the 7-day average is above the 14-day average are highlighted in red.

Australian Fatality Rate

inc <- 6
icu <- 11
d <- 7
offset <- icu + d 
minDate <- "2020-04-20"
list(
  data %>% 
    dplyr::filter(CODE == "AUS", REPORT_DATE > minDate) %>% 
    dplyr::select(date = REPORT_DATE, confirmed = CASE_CNT, deaths = DEATH_CNT)  %>% 
    arrange(date) %>% 
    mutate(fr = deaths / confirmed, type = "No Offset"),
  data %>% 
    dplyr::filter(CODE == "AUS") %>% 
    dplyr::select(date = REPORT_DATE, confirmed = CASE_CNT, deaths = DEATH_CNT)  %>% 
    arrange(date) %>% 
    mutate(
      confirmed = c(rep(NA, offset), confirmed[seq_len(nrow(.) - offset)]),
      fr = deaths / confirmed, 
      type = glue("Offset ({offset} days)")
    ) %>%
    dplyr::filter(date > minDate)
) %>%
  bind_rows() %>%
  ggplot(
    aes(date, fr, colour = type)
  ) +
  geom_line() +
  scale_x_date(
    expand = expansion(mult = 0, add = 20)
  ) +
  scale_y_continuous(label = percent) +
  labs(
    x = "Date",
    y = "Estimated Fatality Rate",
    colour = "Calculation"
  )
*Fatality rate for Australian cases as calculated using two methods.
Where no offset is included, the rate shown is simply the number of fatalities divided by the total number of reported cases on the same date.
When cases increase during a new outbreak, this will skew the fatality rate lower.
An alternative is to use an offset based on the fact the the median time from infection to symptom onset is 6 days, the median time from symptom onset to ICU admission is 11 days, and the median time from ICU admission to mortality is 7 days.
When using the offset, the fatality rate is calculated as the number of recorded fatalities on a given date, divided by by the number of cases from 18 days ago.
Whilst still flawed this may give a less biased estimate on the true fatality rate, and importantly, will always be higher than the alternative calculation.
The intial fatality rate spiked above 30% during the intial outbreak under the offset approach, and as such, data is only shown after 20 Apr, 2020.
All times used for estimation the offset were obtained from [here](https://wwwnc.cdc.gov/eid/article/26/6/20-0320_article)*

Fatality rate for Australian cases as calculated using two methods. Where no offset is included, the rate shown is simply the number of fatalities divided by the total number of reported cases on the same date. When cases increase during a new outbreak, this will skew the fatality rate lower. An alternative is to use an offset based on the fact the the median time from infection to symptom onset is 6 days, the median time from symptom onset to ICU admission is 11 days, and the median time from ICU admission to mortality is 7 days. When using the offset, the fatality rate is calculated as the number of recorded fatalities on a given date, divided by by the number of cases from 18 days ago. Whilst still flawed this may give a less biased estimate on the true fatality rate, and importantly, will always be higher than the alternative calculation. The intial fatality rate spiked above 30% during the intial outbreak under the offset approach, and as such, data is only shown after 20 Apr, 2020. All times used for estimation the offset were obtained from here

Current Growth Factor

n <- 14
minCases <- 1
cp <- glue(
  "*Growth factor for each State/Territory. 
  __Values are calculated using only locally-acquired cases__.
  In order to try and minimise volatility a {n} day simple moving average was used, in contrast to the 5 day average as advocated [here](https://www.abc.net.au/news/2020-04-10/coronavirus-data-australia-growth-factor-covid-19/12132478).
  This enables assessment of the growth factor over an entire quarantine period.
  This value becomes volatile when daily new cases approach zero as is commonly observed in small populations, and at the end stages of an outbreak. 
  As a result, values are only shown when the {n}-day average of new __locally acquired cases__ exceeds {minCases}.*"
)
gf <- data %>%
  mutate(
    local = CASE_CNT - SRC_OVERSEAS_CNT,
    State = auStates[CODE]
  ) %>%
  dplyr::select(State, date = REPORT_DATE, local) %>%
  arrange(date) %>%
  dplyr::filter(
    date >= "2020-03-28",
    date <= dt
  ) %>%
  group_by(State) %>%
  mutate(
    new = c(0, diff(local)),
    new_ma = sma(new, n)
  ) %>%
  dplyr::filter(local > 0, !is.na(new_ma)) %>%
  mutate(
    R = c(NA, new_ma[-1] / new_ma[-n()]),
    R = case_when(
      is.nan(R) ~ NA_real_,
      new_ma < minCases ~ NA_real_,
      TRUE ~ R
    )
  ) %>%
  ungroup() %>%
  arrange(State)  %>%
  ggplot(aes(date, R, colour = State)) +
  geom_ribbon(aes(ymin = 1, ymax = R), alpha = 0.1) +
  geom_hline(yintercept = 1) +
  geom_label(
    aes(label = R),
    data = . %>%
      dplyr::filter(date == max(date), !is.na(R)) %>%
      mutate(R = round(R, 2), date = date + 1),
    fill = rgb(1, 1, 1, 0.3),
    show.legend = FALSE,
    nudge_y = 0.3,
    size = 4
  ) +
  labs(
    x = "Date", y = "Growth Factor"
  ) +
  facet_wrap(~State, scales = "free_x") +
  theme(legend.position = "none") +
  coord_cartesian(ylim = c(0.5, 1.8))
gf
*Growth factor for each State/Territory. 
__Values are calculated using only locally-acquired cases__.
In order to try and minimise volatility a 14 day simple moving average was used, in contrast to the 5 day average as advocated [here](https://www.abc.net.au/news/2020-04-10/coronavirus-data-australia-growth-factor-covid-19/12132478).
This enables assessment of the growth factor over an entire quarantine period.
This value becomes volatile when daily new cases approach zero as is commonly observed in small populations, and at the end stages of an outbreak. 
As a result, values are only shown when the 14-day average of new __locally acquired cases__ exceeds 1.*

Growth factor for each State/Territory. Values are calculated using only locally-acquired cases. In order to try and minimise volatility a 14 day simple moving average was used, in contrast to the 5 day average as advocated here. This enables assessment of the growth factor over an entire quarantine period. This value becomes volatile when daily new cases approach zero as is commonly observed in small populations, and at the end stages of an outbreak. As a result, values are only shown when the 14-day average of new locally acquired cases exceeds 1.

The current 14 day growth factor is 0.79 which gives some degree of confidence that the spread of infections is relatively under control.

Testing Within Each State

# tested %>% 
#   left_join(confirmed, by = c("State", "Country", "date") ) %>%
data %>%
  dplyr::filter(REPORT_DATE == dt) %>%
  mutate(
    State = auStates[CODE],
    TEST_CNT = case_when(
      is.na(TEST_CNT) ~ PREV_TEST_CNT,
      !is.na(TEST_CNT) ~ TEST_CNT
    )
  ) %>% 
  dplyr::select(date = REPORT_DATE, State, tests = TEST_CNT, confirmed = CASE_CNT)  %>%
  left_join(ausPops,  by = "State") %>%
  mutate(
    `Tests / '000` = round(1e3 * tests / Population, 2),
    Positive = confirmed / tests,
    Negative = 1 - Positive,
    isTotal = grepl("Total", State),
    State = str_replace(State, "All States", "National Total")
  ) %>%
  dplyr::select(
    State, Population,
    Confirmed = confirmed,
    Tests = tests, 
    contains("000"), 
    ends_with("ive"),
    isTotal
  ) %>%
  arrange(isTotal, desc(`Tests / '000`)) %>%
  dplyr::select(-isTotal) %>%
  dplyr::rename(
    `% Positive Tests` = Positive,
    `% Negative Tests` = Negative
  ) %>%
  mutate_at(
    vars(starts_with("%")), percent, accuracy = 0.01
  ) %>%
  split(f = .$State == "National Total") %>%
  bind_rows() %>%
  pander(
    justify = "lrrrrrr",
    missing = "",
    caption = glue(
      "*COVID-19 testing scaled by state population size.
      Confirmed cases are assumed to be the tests returning a positive result.
      The current numbers available for some states are a lower limit, and as such, the proportion of the population tested is likely to be higher, as is the proportion of tests returning a negative result.*"
    ),
    emphasize.strong.rows = nrow(.)
  )
COVID-19 testing scaled by state population size. Confirmed cases are assumed to be the tests returning a positive result. The current numbers available for some states are a lower limit, and as such, the proportion of the population tested is likely to be higher, as is the proportion of tests returning a negative result.
State Population Confirmed Tests Tests / '000 % Positive Tests % Negative Tests
Victoria 6,629,870 20,434 4,338,030 654.3 0.47% 99.53%
New South Wales 8,117,976 5,083 4,541,913 559.5 0.11% 99.89%
South Australia 1,756,494 596 935,333 532.5 0.06% 99.94%
Northern Territory 245,562 97 95,738 389.9 0.10% 99.90%
Australian Capital Territory 428,060 118 153,455 358.5 0.08% 99.92%
Queensland 5,115,451 1,303 1,715,747 335.4 0.08% 99.92%
Tasmania 535,500 234 154,963 289.4 0.15% 99.85%
Western Australia 2,630,557 890 707,133 268.8 0.13% 99.87%
National Total 25,459,470 28,755 12,642,312 496.6 0.23% 99.77%

R Session Information

R version 4.0.3 (2020-10-10)

**Platform:** x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=C, LC_NUMERIC=C, LC_TIME=C, LC_COLLATE=C, LC_MONETARY=C, LC_MESSAGES=en_AU.UTF-8, LC_PAPER=en_AU.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_AU.UTF-8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: jsonlite(v.1.7.2), QuantTools(v.0.5.7.1), data.table(v.1.13.6), plotly(v.4.9.3), pander(v.0.6.3), rvest(v.0.3.6), xml2(v.1.3.2), glue(v.1.4.2), ggrepel(v.0.9.0), scales(v.1.1.1), lubridate(v.1.7.9.2), rlang(v.0.4.10), forcats(v.0.5.0), stringr(v.1.4.0), dplyr(v.1.0.2), purrr(v.0.3.4), readr(v.1.4.0), tidyr(v.1.1.2), tibble(v.3.0.4), ggplot2(v.3.3.3) and tidyverse(v.1.3.0)

loaded via a namespace (and not attached): Rcpp(v.1.0.5), ps(v.1.5.0), assertthat(v.0.2.1), digest(v.0.6.27), R6(v.2.5.0), cellranger(v.1.1.0), backports(v.1.2.1), reprex(v.0.3.0), evaluate(v.0.14), highr(v.0.8), httr(v.1.4.2), pillar(v.1.4.7), lazyeval(v.0.2.2), curl(v.4.3), readxl(v.1.3.1), rstudioapi(v.0.13), rmarkdown(v.2.6), labeling(v.0.4.2), htmlwidgets(v.1.5.3), munsell(v.0.5.0), broom(v.0.7.3), compiler(v.4.0.3), modelr(v.0.1.8), xfun(v.0.20), pkgconfig(v.2.0.3), htmltools(v.0.5.0), tidyselect(v.1.1.0), fasttime(v.1.0-2), fansi(v.0.4.1), viridisLite(v.0.3.0), crayon(v.1.3.4), dbplyr(v.2.0.0), withr(v.2.3.0), grid(v.4.0.3), gtable(v.0.3.0), lifecycle(v.0.2.0), DBI(v.1.1.0), magrittr(v.2.0.1), cli(v.2.2.0), stringi(v.1.5.3), farver(v.2.0.3), fs(v.1.5.0), ellipsis(v.0.3.1), generics(v.0.1.0), vctrs(v.0.3.6), tools(v.4.0.3), hms(v.0.5.3), crosstalk(v.1.1.0.1), yaml(v.2.2.1), colorspace(v.2.0-0), knitr(v.1.30) and haven(v.2.3.1)